Load libraries

library(tidyverse)
library(scran)
library(scater)
library(gprofiler2)
library(gridExtra)
library(ComplexHeatmap)
library(circlize)
library(edgeR)
library(readxl)
library(ggrepel)
library(tftargets)
library(network)
library(ggnet)
library(parallel)
library(BiocParallel)
library(ggsci)
library(airr)
library(numbat)
library(scales)

ncores <- 10
mcparam <- MulticoreParam(workers=ncores)
register(mcparam)
opt <- list()
opt$pathinfo <- "misc/pathways_2.xlsx"
opt$scesc <- "data/submission/scRNA_complete.RDS"
opt$scescpat <- "data/submission/scRNA_per_pat_complete.RDS"
#opt$types <- "data/loop2020_celltypes.csv"
#opt$immtab <- "data/loop2020_table_heavy_light_contr_mut_burden_tcr.tsv"
opt$plot <- "plots/Fig4/"

## Set theme
lgd <-  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5), 
        text = element_text(size = 15),
        panel.background = element_rect(fill = "transparent",colour = NA),
        plot.background = element_rect(fill = "transparent",colour = NA), 
        legend.background = element_rect(fill='transparent',colour = NA), 
        strip.background = element_blank(), 
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank())

Load data

## scRNA-Seq data
sce.sc <- readRDS(opt$scesc)

## single-cell cell types
types <- sce.sc[["types"]]

## Load tcr data
immTab <- sce.sc[["immTab"]]

sce.sc <- sce.sc[["SCE"]]

## sce per pat
sce.pat <- readRDS(opt$scescpat)

## Load pathway annotation
path.info <- read_excel(opt$pathinfo)

Overview scRNA-seq

## Randomly sample 
set.seed(100)
sce.sc <- sce.sc[, sample(1:ncol(sce.sc), ncol(sce.sc))]

## Re-run UMAP
set.seed(100)
sce.sc <- runUMAP(sce.sc, n_neighbors = 15, min_dist = 0.75, name = "UMAP", BPPARAM = mcparam)

## Make plots
plotTab <- ggcells(sce.sc)[[1]]

## Cell type
p <- plotTab %>%
  left_join(types, by = "uniqueBarcode") %>%
  mutate(celltype = ifelse(celltype %in% c("malignant B-cell", "malignant T-cell"), Diagnosis_simple, celltype)) %>%
  ggplot(aes(x = UMAP.1, y = UMAP.2, fill = celltype)) +
  geom_point(shape = 21, size = 2.5, stroke = 0.25) +
  lgd + theme_void() +
  scale_fill_manual(values = c("MCL" = "#F44336", "T-PLL" = "#64B5F6",
                                "healthy T-cell" = "#0D47A1", "NK-cell" = "#AB47BC",
                                "healthy B-cell" = "#FFA000", "HCL" = "#FFD45F",
                               "T-LGL" = "#80DEEA",
                                "monocyte" = "#66BB6A")) +
  theme(legend.position = "none")
p

ggsave(plot = p, filename = paste0(opt$plot, "scrna_umap_celltypes.png"), 
       height = 15, width = 15, units = "cm")

## Treatment
p <- plotTab %>%
  left_join(types, by = "uniqueBarcode") %>%
  mutate(celltype = ifelse(celltype %in% c("malignant B-cell", "malignant T-cell"), Diagnosis_simple, celltype)) %>%
  ggplot(aes(x = UMAP.1, y = UMAP.2, fill = Treatment)) +
  geom_point(shape = 21, size = 2.5, stroke = 0.25) +
  lgd + theme_void() +
  scale_fill_manual(values = c("DMSO" = "grey70", "Birinapant" = "#AD1457", "Nutlin-3a" = "#FFA000", 
                                "Ibrutinib" = "#283593", "Everolimus" = "#43A047", 
                               "Selumetinib" = "#F8BBD0")) +
  theme(legend.position = "none")
p

ggsave(plot = p, filename = paste0(opt$plot, "scrna_umap_treatment.png"), 
       height = 15, width = 15, units = "cm")

## PatientID
p <- plotTab %>%
  left_join(types, by = "uniqueBarcode") %>%
  mutate(celltype = ifelse(celltype %in% c("malignant B-cell", "malignant T-cell"), Diagnosis_simple, celltype)) %>%
  ggplot(aes(x = UMAP.1, y = UMAP.2, fill = patientID)) +
  geom_point(shape = 21, size = 2.5, stroke = 0.25) +
  lgd + theme_void() +
  scale_fill_npg() +
  theme(legend.position = "none")
p

ggsave(plot = p, filename = paste0(opt$plot, "scrna_umap_patientID.png"), 
       height = 15, width = 15, units = "cm")

Show donut plots

plotTab.vdj <- plotTab %>%
  left_join(types, by = "uniqueBarcode") %>%
  filter(celltype %in% c("malignant T-cell", "healthy T-cell")) %>%
  mutate(celltype = ifelse(celltype == "malignant T-cell", patientID, celltype), 
         celltype = ifelse(celltype == "healthy T-cell", "Healthy T-Cells", celltype))


plotTab.vdj[plotTab.vdj$celltype == "H371" , "celltype"] <- "T-PLL1"
plotTab.vdj[plotTab.vdj$celltype == "H431", "celltype"] <- "T-PLL2"
plotTab.vdj[plotTab.vdj$celltype == "H279", "celltype"] <- "T-PLL3"
plotTab.vdj[plotTab.vdj$celltype == "H501", "celltype"] <- "T-LGL1"

## Merge VDJ data with plotting info
plotTab.vdj <- plotTab.vdj %>% left_join(select(immTab, cell_id, clone_id, c_call, #mu_freq, sequence_alignment
                                                ), 
                                     by = c("uniqueBarcode" = "cell_id"))

## Calculate the number of clones per cell type
countTab <- plotTab.vdj %>%
  group_by(celltype) %>%
  select(celltype, clone_id) %>% unique() %>%
  dplyr::count(celltype) %>% mutate(n_clones = n) %>%
  select(-n)
countTab
## # A tibble: 6 × 2
## # Groups:   celltype [6]
##   celltype        n_clones
##   <chr>              <int>
## 1 H525                   1
## 2 Healthy T-Cells     1695
## 3 T-LGL1                 1
## 4 T-PLL1                 2
## 5 T-PLL2                 2
## 6 T-PLL3                 2
## Calculate the size of each clone
countSize <- plotTab.vdj %>%
  filter(!is.na(clone_id)) %>%
  group_by(celltype) %>%
  select(celltype, clone_id) %>%
  dplyr::count(celltype, clone_id) %>% mutate(clone_size = n) %>%
  select(-n)
countSize
## # A tibble: 1,697 × 3
## # Groups:   celltype [4]
##    celltype        clone_id      clone_size
##    <chr>           <chr>              <int>
##  1 Healthy T-Cells TCR_1001_250           1
##  2 Healthy T-Cells TCR_1005_512           1
##  3 Healthy T-Cells TCR_1006_1035          1
##  4 Healthy T-Cells TCR_1008               1
##  5 Healthy T-Cells TCR_1008_1376          1
##  6 Healthy T-Cells TCR_1009_1739          1
##  7 Healthy T-Cells TCR_1010_1005          1
##  8 Healthy T-Cells TCR_1011_1629          2
##  9 Healthy T-Cells TCR_1012_1017          1
## 10 Healthy T-Cells TCR_1012_1413          1
## # ℹ 1,687 more rows
###################
# Donut charts of clonal composition
###################
## Calculate the size of each clone - don't subset to only cells with clone_id
# otherwise NA T-cells will not be in the plot later.
cloneCount <- plotTab.vdj %>% 
  group_by(celltype) %>%
  dplyr::count(clone_id) %>% ungroup()
cloneCount
## # A tibble: 1,703 × 3
##    celltype        clone_id          n
##    <chr>           <chr>         <int>
##  1 H525            <NA>              1
##  2 Healthy T-Cells TCR_1001_250      1
##  3 Healthy T-Cells TCR_1005_512      1
##  4 Healthy T-Cells TCR_1006_1035     1
##  5 Healthy T-Cells TCR_1008          1
##  6 Healthy T-Cells TCR_1008_1376     1
##  7 Healthy T-Cells TCR_1009_1739     1
##  8 Healthy T-Cells TCR_1010_1005     1
##  9 Healthy T-Cells TCR_1011_1629     2
## 10 Healthy T-Cells TCR_1012_1017     1
## # ℹ 1,693 more rows
## This script was written for a B-cell analysis. Therefore, some labels 
# might refer to B-cells, but we're looking at T-cells at the moment. 

cloneNum <- lapply(unique(plotTab.vdj$celltype), function(x) {
  subTab <- filter(plotTab.vdj, celltype == x) # subset to each patient
  countTab <- dplyr::count(subTab, clone_id)
  nclones <- countTab %>% nrow() # get the number of clones
  ncells_bcr <- nrow(subTab) # calculate the number of B or T cells per patient - important to subset plotTab to only B or T-cell clusters before doing this
  res <- data.frame(patID = x, nclones = nclones, ncells_bcr = ncells_bcr)
  res$type <- opt$subType
  res
}) %>% bind_rows()
cloneNum
##             patID nclones ncells_bcr
## 1          T-PLL2       2       9642
## 2          T-LGL1       1       5340
## 3          T-PLL1       2       8169
## 4          T-PLL3       2       6778
## 5 Healthy T-Cells    1695       3032
## 6            H525       1          1
df <- left_join(cloneCount, cloneNum, by = c("celltype" = "patID")) %>%
  group_by(celltype) %>% mutate(clone_ratio = n / ncells_bcr) %>% # divide the size of the clone 
  select(celltype, clone_id, clone_ratio, nclones) %>% unique() %>%
  mutate(ymax = cumsum(clone_ratio), ymin = c(0, head(ymax, n= -1)))
df
## # A tibble: 1,703 × 6
## # Groups:   celltype [6]
##    celltype        clone_id      clone_ratio nclones     ymax     ymin
##    <chr>           <chr>               <dbl>   <int>    <dbl>    <dbl>
##  1 H525            <NA>             1              1 1        0       
##  2 Healthy T-Cells TCR_1001_250     0.000330    1695 0.000330 0       
##  3 Healthy T-Cells TCR_1005_512     0.000330    1695 0.000660 0.000330
##  4 Healthy T-Cells TCR_1006_1035    0.000330    1695 0.000989 0.000660
##  5 Healthy T-Cells TCR_1008         0.000330    1695 0.00132  0.000989
##  6 Healthy T-Cells TCR_1008_1376    0.000330    1695 0.00165  0.00132 
##  7 Healthy T-Cells TCR_1009_1739    0.000330    1695 0.00198  0.00165 
##  8 Healthy T-Cells TCR_1010_1005    0.000330    1695 0.00231  0.00198 
##  9 Healthy T-Cells TCR_1011_1629    0.000660    1695 0.00297  0.00231 
## 10 Healthy T-Cells TCR_1012_1017    0.000330    1695 0.00330  0.00297 
## # ℹ 1,693 more rows
#### Overview over all patients/celltypes

pClone.Pat <- df %>% 
  filter(celltype %in% c("T-PLL1", "T-PLL2", "T-PLL3", "Healthy T-Cells")) %>%
  ggplot(aes(ymax = ymax, ymin = ymin, xmax=4, xmin=3, fill = clone_id)) +
  geom_rect(colour = "black", size = 0.25) +
  coord_polar(theta="y") +
  xlim(c(2, 4)) +
  # ggtitle(paste0("Clonal Composition per Patient - ", opt$subType)) +
  facet_wrap(. ~ celltype, nrow =  1) +
  theme_void() +
  theme(plot.title = element_text(hjust = 0.5),
        strip.background = element_blank(),
        strip.text.x = element_text(size = 12.5),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_rect(fill = "transparent",colour = NA),
        plot.background = element_rect(fill = "transparent",colour = NA),
        legend.position = "none")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
pClone.Pat

#### Output individual patients
subPat <- c("T-PLL1", "T-PLL2", "T-PLL3", "Healthy T-Cells")

lapply(subPat, function(subPat) {
  pClone <- df %>% filter(celltype == subPat) %>%
  ggplot(aes(ymax = ymax, ymin = ymin, xmax=4, xmin=3, fill = clone_id)) +
  geom_rect(colour = "black", size = 0.25) +
  coord_polar(theta="y") +
  xlim(c(2, 4)) +
  ggtitle(paste0(subPat)) +
  theme_void() +
  theme(plot.title = element_text(hjust = 0.5, size = 75),
        legend.position = "none")
  pClone
  
  ggsave(plot = pClone, filename = paste0(opt$plot, "tcr_comp_", subPat, ".png"), 
         height = 20, width = 20, units = "cm")
})
## [[1]]
## [1] "plots/Fig4/tcr_comp_T-PLL1.png"
## 
## [[2]]
## [1] "plots/Fig4/tcr_comp_T-PLL2.png"
## 
## [[3]]
## [1] "plots/Fig4/tcr_comp_T-PLL3.png"
## 
## [[4]]
## [1] "plots/Fig4/tcr_comp_Healthy T-Cells.png"

Perform differential gene expression testing - scRNA-Seq

opt$subsample <- TRUE

compDrug <- c("Nutlin-3a", "Birinapant", "Ibrutinib", "Everolimus", "Selumetinib")

sce_sub <- sce.sc
plotTab <- ggcells(sce_sub)[[1]]

colData(sce_sub) <- colData(sce_sub) %>%
  data.frame() %>%
  left_join(types, by = "uniqueBarcode") %>%
  mutate(celltype = ifelse(celltype %in% c("malignant B-cell", "malignant T-cell"), Diagnosis_simple, celltype)) %>%
  mutate(celltype = ifelse(celltype %in% c("healthy T-cell"), "Healthy T-Cells", celltype)) %>%
  mutate(celltype = ifelse(celltype %in% c("NK-cell"), "NK-Cells", celltype)) %>%
  mutate(celltype = ifelse(celltype %in% c("monocyte"), "Monocytes", celltype)) %>%
  mutate(celltype = ifelse(patientID%in% c("H501") & celltype %in% c("MCL", "HCL", "T-PLL"), "dead cell", celltype)) %>%
  mutate(celltype = ifelse(patientID%in% c("H431", "H371", "H279") & celltype %in% c("MCL", "HCL", "T-LGL"), "dead cell", celltype)) %>%
  mutate(celltype = ifelse(patientID%in% c("H526", "H525") & celltype %in% c("MCL", "T-PLL", "T-LGL"), "dead cell", celltype)) %>%
  mutate(celltype = ifelse(patientID%in% c("H496", "H432", "P1029") & celltype %in% c("HCL", "T-PLL", "T-LGL"), "dead cell", celltype)) %>%
  DataFrame()


if(opt$subsample == TRUE) {
 cellSub <- c("HCL", "MCL", "T-PLL", "Healthy T-Cells") 
} else {
  cellSub <- c("HCL", "MCL", "T-PLL")
}

testTreat <- c("Birinapant", "Nutlin-3a", "Ibrutinib", "Everolimus", "Selumetinib")

countTab <- plotTab %>%
  left_join(types, by = "uniqueBarcode") %>%
  mutate(celltype = ifelse(celltype %in% c("malignant B-cell", "malignant T-cell"), Diagnosis_simple, celltype)) %>%
  mutate(celltype = ifelse(celltype %in% c("healthy T-cell"), "Healthy T-Cells", celltype)) %>%
  mutate(celltype = ifelse(celltype %in% c("NK-cell"), "NK-Cells", celltype)) %>%
  mutate(celltype = ifelse(celltype %in% c("monocyte"), "Monocytes", celltype)) %>%
  group_by(celltype, Treatment) %>%
  dplyr::count() %>%
  filter(celltype %in% cellSub, Treatment %in% testTreat) %>%
  arrange(n)

minCells <- countTab[1, ]$n
minCells
## [1] 287
de.res.2020 <- lapply(testTreat, function(z) {
  message(paste0("Fitting model for ", z))
  lapply(cellSub, #mc.cores = ncores, 
         function(x) {
    message(paste0("Celltype ", x))
    sce.x <- sce_sub[, sce_sub$celltype == x]
    sce.x <- sce.x[, sce.x$Treatment %in% c("DMSO", testTreat)]
    message(ncol(sce.x), " cells")
    
    if(opt$subsample == TRUE) {
      message("Subsetting to ", minCells)
      
      sce.x <- do.call(cbind, lapply(unique(sce.x$Treatment), function(y) {
        sce.y <- sce.x[, sce.x$Treatment == y]
        
        if(ncol(sce.y) == minCells) {sce.y} else {
          set.seed(101)
          sce.y <- sce.y[,sample(1:ncol(sce.y), minCells)]
          sce.y
        }
        sce.y
      }))
    }
 
    summed <- summarizeAssayByGroup(sce.x, ids = colData(sce.x)[,c("Diagnosis_simple", "patientID", "Treatment")], 
                                    statistics = "sum", assay.type = "counts") # get raw counts and sum them up
    #  summed <- applySCE(sce.x, aggregateAcrossCells, ids=colData(sce.x)[,c("Diagnosis_simple", "patientID", "Treatment")])
    colData(summed)$Treatment <- factor(colData(summed)$Treatment, levels = c("DMSO", compDrug))
    y <- DGEList(assay(summed), samples = colData(summed))
    opt$subsample
    if(opt$subsample == TRUE) {
      discarded <- summed$ncells < 5
    } else {
      discarded <- summed$ncells < 30
    }
    y <- y[,!discarded]
    message(paste0("Discarding ", sum(discarded), " samples"))
    design <- model.matrix(~0 + factor(patientID) + Treatment, y$samples) 

    keep <- filterByExpr(y, design = design)
    y <- y[keep,]
    summary(keep)
    message(paste0("Keeping ", sum(keep), " genes"))
    y <- calcNormFactors(y)
    # head(design)
    y <- estimateDisp(y, design) # gene-wise dispersion (variance on top of poisson distribution)
    #plotBCV(y)
    fit <- glmQLFit(y, design, robust=TRUE) # QL way of fitting the model
    #plotQLDisp(fit)
    res <- glmQLFTest(fit, coef = paste0("Treatment", z))
    # #res <- glmQLFTest(fit, coef = "TreatmentNutlin-3a")
    summary(decideTests(res))
    resTab <- res$table
    resTab$gene <- rownames(resTab)
    resTab$celltype <- x
    resTab <- resTab #%>%
      #mutate(p.adj = ifelse(p.adj < 1e-12, 1e-12, p.adj))
    resTab$Treatment <- z
    resTab %>% arrange(desc(logFC)) %>% mutate(p.adj = p.adjust(PValue, method = "BH"))
  }) %>% bind_rows()
}) %>% bind_rows()

de.res.2020 <- de.res.2020 %>%
  left_join(select(data.frame(rowData(sce.sc)), ID, Symbol), c("gene" = "ID"))

Show number of DEGs

################################################################################
# Show number of DEGs
################################################################################

plotMat <- de.res.2020 %>%
  #filter(PValue < 0.05) %>%
  filter(p.adj < 0.1) %>%
  filter(logFC > 0.25 | logFC < -0.25) %>%
  mutate(dir = ifelse(logFC < 0, "Down", "Up")) %>%
  group_by(celltype, dir, Treatment) %>%
  dplyr::rename(Direction = dir) %>%
  dplyr::count()

plotMat <- plotMat %>%
  mutate(n = ifelse(Direction == "Down", n * -1, n)) 

p <- plotMat %>%
  #mutate(celltype = factor(celltype, levels = c("HCL", "MCL", "T-PLL & T-LGL", "T- & NK-cells"))) %>%
  mutate(celltype = factor(celltype, levels = c("HCL", "MCL", "T-PLL", "Healthy T-Cells"))) %>%
  ggplot(aes(x = celltype, y = n, fill = Direction)) +
  geom_bar(colour = "black", stat = "identity") +
  geom_hline(yintercept = 1) +
  geom_text(aes(label = n), nudge_y = 50, data = plotMat[plotMat$Direction == "Up", ], size = 5) +
  geom_text(aes(label = n), nudge_y = -50, data = plotMat[plotMat$Direction == "Down", ], size = 5) +
  xlab("") + ylab("Number of Genes") + ggtitle("DEGs per Cell Type") +
  lgd + scale_fill_manual(values = c("Up" = "#F44336", "Down" = "#1976D2")) +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1), 
        text = element_text(size = 22.5), 
        panel.border = element_rect(linewidth = 1)) +
  facet_wrap(. ~ Treatment, nrow = 1)
p

ggsave(plot = p, filename = paste0(opt$plot, "n_deg_per_celltype.png"), 
       height = 15, width = 50, units = "cm")

Show volcano plots

geneSelec <- path.info %>% filter(pathway %in% c("NFKB", "NFKB_Chemo", "NFKB_GF", "NFKB_TF", "NFKB_Apo", "TNF"#, "TLR"
                                                 )) %>%
  select(gene) %>% unlist() %>% as.vector()

lapply(c("MCL", "HCL", "T-PLL", "Healthy T-Cells"), function(y) {
  pList <- lapply(c("Birinapant", "Everolimus", "Ibrutinib")[1], function(x) {

  volTab <- de.res.2020 %>% filter(Treatment == x, celltype == y) %>% #%>% dplyr::rename(Symbol = gene)
    mutate(p.adj = ifelse(p.adj < 1e-12, 1e-12, p.adj)) %>% arrange(p.adj)
  
  ## Define limits
  minLim <- -1 * max(abs(de.res.2020$logFC))
  maxLim <- max(abs(de.res.2020$logFC))
  thresh <- 0.25

  volTab$diffCol <- NA
  volTab[volTab$logFC > 0 & volTab$p.adj < 0.1 & volTab$logFC > thresh, "diffCol"] <- "sigUp"
  volTab[volTab$logFC < 0 & volTab$p.adj < 0.1 & volTab$logFC < -thresh, "diffCol"] <- "sigDown"
  volTab[volTab$p.adj >= 0.1, "diffCol"] <- "ns"
  volTab[volTab$logFC < thresh & volTab$logFC > -(thresh), "diffCol"] <- "ns"

  p <- volTab %>%
    mutate(logFC = ifelse(logFC > maxLim, maxLim, logFC)) %>%
    mutate(logFC = ifelse(logFC < minLim, minLim, logFC)) %>%
    #filter(gene %in% geneSelec) %>%
    filter(Treatment == x) %>%
    ggplot(aes(x = logFC, y = -log10(p.adj), fill = diffCol)) +
    geom_point(shape = 21, size = 2) + ggtitle(paste0(y, " - ", x)) +
    xlab("Log2FC") + ylab("-Log10(p.adj)") +
    xlim(-5, 5) + ylim(0, 12.75) +
    geom_hline(yintercept = -log10(0.1), linetype = "dashed") +
    geom_vline(xintercept = -thresh, linetype = "dashed") + geom_vline(xintercept = thresh, linetype = "dashed") +
    geom_label_repel(data = filter(volTab, Symbol %in% c(geneSelec), Treatment %in% x, p.adj < 0.1,
                                 logFC > thresh | logFC < -(thresh)), aes(label = Symbol),
                    nudge_y = 0.6, size = 5, max.overlaps = getOption("ggrepel.max.overlaps", default = 30),
                     segment.linetype = 2, force = 20, alpha = 0.8, fill = "white") +
    scale_fill_manual(values = c("sigUp" = "#F44336", "sigDown" = "#1976D2", "ns" = "grey")) +
    theme_classic() +
    theme(plot.title = element_text(hjust = 0.5),
          legend.position = "none",
          text = element_text(size = 17.5),
          axis.text = element_text(size = 15),
          panel.background = element_rect(fill = "transparent",colour = NA),
          plot.background = element_rect(fill = "transparent",colour = NA),
          legend.background = element_rect(fill='transparent'),
          strip.background = element_blank())

   ggsave(plot = p, filename = paste0(opt$plot, "vol_", y, "_", x, ".png"),
          height = 15, width = 15, unit = "cm")
  p
  })
  grid.arrange(grobs = pList)
})
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).

## [[1]]
## TableGrob (1 x 1) "arrange": 1 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 
## [[2]]
## TableGrob (1 x 1) "arrange": 1 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 
## [[3]]
## TableGrob (1 x 1) "arrange": 1 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 
## [[4]]
## TableGrob (1 x 1) "arrange": 1 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]

Load per patient

patSub <- names(sce.pat)
sce.pat <- mclapply(patSub, mc.cores = ncores, function(x) {
  sce <- sce.pat[[x]]

  set.seed(100)
  sce <- runUMAP(sce, n_neighbors = 15, min_dist = 0.35, name = "UMAP", BPPARAM = mcparam)

})
names(sce.pat) <- patSub


patSub <- c("H371", "H431", "H279")

lapply(patSub, function(x) {
  sce <- sce.pat[[x]]
  
  set.seed(100)
  sce <- sce[,sample(1:ncol(sce), ncol(sce))]
  lapply(c("Birinapant"), function(y) {
    plotTab <- ggcells(sce)[[1]]
    
  if(x == "H371") {tlt <- "T-PLL1"
  } else if(x == "H431") {tlt <- "T-PLL2"
  } else if(x == "H279") {tlt <- "T-PLL3"} else {
    tlt <- x
  }

    p <- plotTab %>% 
      filter(Treatment %in% c("DMSO", y)) %>%
      ggplot(aes(x = UMAP.1, y = UMAP.2, fill = Treatment)) +
      geom_point(shape = 21, size = 3) +
      ggtitle(tlt) +
      lgd +
      theme_void() +
      theme(axis.text = element_blank(), 
            axis.ticks = element_blank(), 
            legend.position = "none", 
            plot.title = element_text(size = 20, hjust = 0.5)) +
      scale_fill_manual(values = c("DMSO" = "grey70", "Birinapant" = "#AD1457", "Nutlin-3a" = "#FFA000", 
                                   "Ibrutinib" = "#283593", "Everolimus" = "#43A047", 
                                   "Selumetinib" = "#F8BBD0"))
    
      ggsave(plot = p, filename = paste0(opt$plot, x, "_perpat_", y, "_umap.png"), 
         height = 12.5, width = 12.5, units = "cm")
      p
  })
})
## [[1]]
## [[1]][[1]]

## 
## 
## [[2]]
## [[2]][[1]]

## 
## 
## [[3]]
## [[3]][[1]]

lapply(c("NFKBIA"), function(y) {
  
  lapply(c("Birinapant"), function(x) {
    lapply(patSub, function(z) {
        sce <- sce.pat[[z]]
        rownames(sce) <- rowData(sce)$Symbol
        plotTab <- ggcells(sce, features = y)[[1]]
      
    plotTab <- plotTab %>% 
      filter(Treatment %in% c("DMSO", x)) %>%
      mutate(Treatment = factor(Treatment, levels = c("DMSO", x))) %>%
      pivot_longer(cols = y, names_to = "gene", values_to = "expr") 
  
    p <- plotTab %>%
        ggplot(aes(x = UMAP.1, y = UMAP.2, fill = expr)) +
        geom_point(shape = 21, size = 3) +
        lgd +
        theme_void() +
        theme(axis.text = element_blank(),
              axis.ticks = element_blank(),
              legend.position = "none", 
              strip.text = element_text(size = 20, hjust = 0.5)) +
        scale_fill_gradient(high = "red", low = "grey80",
                            limits=c(0, quantile(plotTab$expr, 0.99)), oob=squish) +
      facet_wrap(. ~ Treatment)

      ggsave(plot = p, filename = paste0(opt$plot, "perpat/", x, "_perpat_", y, "_", z, "_umap.png"),
         height = 12.5, width = 27.5, units = "cm")
      
      p
      
    })
  })
})
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(y)
## 
##   # Now:
##   data %>% select(all_of(y))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## [[1]]
## [[1]][[1]]
## [[1]][[1]][[1]]

## 
## [[1]][[1]][[2]]

## 
## [[1]][[1]][[3]]

Perform differential gene expression testing - scRNA-Seq - without subsampling

opt$subsample <- FALSE

compDrug <- c("Nutlin-3a", "Birinapant", "Ibrutinib", "Everolimus", "Selumetinib")

sce_sub <- sce.sc
plotTab <- ggcells(sce_sub)[[1]]

colData(sce_sub) <- colData(sce_sub) %>%
  data.frame() %>%
  left_join(types, by = "uniqueBarcode") %>%
  mutate(celltype = ifelse(celltype %in% c("malignant B-cell", "malignant T-cell"), Diagnosis_simple, celltype)) %>%
  mutate(celltype = ifelse(celltype %in% c("healthy T-cell"), "T-Cells", celltype)) %>%
  mutate(celltype = ifelse(celltype %in% c("NK-cell"), "NK-Cells", celltype)) %>%
  mutate(celltype = ifelse(celltype %in% c("monocyte"), "Monocytes", celltype)) %>%
  mutate(celltype = ifelse(patientID%in% c("H501") & celltype %in% c("MCL", "HCL", "T-PLL"), "dead cell", celltype)) %>%
  mutate(celltype = ifelse(patientID%in% c("H431", "H371", "H279") & celltype %in% c("MCL", "HCL", "T-LGL"), "dead cell", celltype)) %>%
  mutate(celltype = ifelse(patientID%in% c("H526", "H525") & celltype %in% c("MCL", "T-PLL", "T-LGL"), "dead cell", celltype)) %>%
  mutate(celltype = ifelse(patientID%in% c("H496", "H432", "P1029") & celltype %in% c("HCL", "T-PLL", "T-LGL"), "dead cell", celltype)) %>%
  DataFrame()


if(opt$subsample == TRUE) {
 cellSub <- c("HCL", "MCL", "T-PLL", "T-Cells") 
} else {
  cellSub <- c("HCL", "MCL", "T-PLL")
}

testTreat <- c("Birinapant", "Nutlin-3a", "Ibrutinib", "Everolimus", "Selumetinib")

countTab <- plotTab %>%
  left_join(types, by = "uniqueBarcode") %>%
  mutate(celltype = ifelse(celltype %in% c("malignant B-cell", "malignant T-cell"), Diagnosis_simple, celltype)) %>%
  mutate(celltype = ifelse(celltype %in% c("healthy T-cell"), "T-Cells", celltype)) %>%
  mutate(celltype = ifelse(celltype %in% c("NK-cell"), "NK-Cells", celltype)) %>%
  mutate(celltype = ifelse(celltype %in% c("monocyte"), "Monocytes", celltype)) %>%
  group_by(celltype, Treatment) %>%
  dplyr::count() %>%
  filter(celltype %in% cellSub, Treatment %in% testTreat) %>%
  arrange(n)

minCells <- countTab[1, ]$n
minCells
## [1] 773
de.res.2020 <- lapply(testTreat, function(z) {
  message(paste0("Fitting model for ", z))
  lapply(cellSub, #mc.cores = ncores, 
         function(x) {
    message(paste0("Celltype ", x))
    sce.x <- sce_sub[, sce_sub$celltype == x]
    sce.x <- sce.x[, sce.x$Treatment %in% c("DMSO", testTreat)]
    message(ncol(sce.x), " cells")
    
    if(opt$subsample == TRUE) {
      message("Subsetting to ", minCells)
      
      sce.x <- do.call(cbind, lapply(unique(sce.x$Treatment), function(y) {
        sce.y <- sce.x[, sce.x$Treatment == y]
        
        if(ncol(sce.y) == minCells) {sce.y} else {
          set.seed(99)
          sce.y <- sce.y[,sample(1:ncol(sce.y), minCells)]
          sce.y
        }
        sce.y
      }))
    }
    
    summed <- summarizeAssayByGroup(sce.x, ids = colData(sce.x)[,c("Diagnosis_simple", "patientID", "Treatment")], 
                                    statistics = "sum", assay.type = "counts") # get raw counts and sum them up
    #  summed <- applySCE(sce.x, aggregateAcrossCells, ids=colData(sce.x)[,c("Diagnosis_simple", "patientID", "Treatment")])
    colData(summed)$Treatment <- factor(colData(summed)$Treatment, levels = c("DMSO", compDrug))
    y <- DGEList(assay(summed), samples = colData(summed))
    opt$subsample
    if(opt$subsample == TRUE) {
      discarded <- summed$ncells < 5
    } else {
      discarded <- summed$ncells < 30
    }
    y <- y[,!discarded]
    message(paste0("Discarding ", sum(discarded), " samples"))
    design <- model.matrix(~0 + factor(patientID) + Treatment, y$samples) # set coefficient (name or matrix column) - fig. 7 f1000

    keep <- filterByExpr(y, design = design)
    y <- y[keep,]
    summary(keep)
    message(paste0("Keeping ", sum(keep), " genes"))
    y <- calcNormFactors(y)
    # head(design)
    y <- estimateDisp(y, design) # gene-wise dispersion (variance on top of poisson distribution)
    #plotBCV(y)
    fit <- glmQLFit(y, design, robust=TRUE) # QL way of fitting the model
    #plotQLDisp(fit)
    res <- glmQLFTest(fit, coef = paste0("Treatment", z))
    # #res <- glmQLFTest(fit, coef = "TreatmentNutlin-3a")
    summary(decideTests(res))
    resTab <- res$table
    resTab$gene <- rownames(resTab)
    resTab$celltype <- x
    resTab <- resTab #%>%
      #mutate(p.adj = ifelse(p.adj < 1e-12, 1e-12, p.adj))
    resTab$Treatment <- z
    resTab %>% arrange(desc(logFC)) %>% mutate(p.adj = p.adjust(PValue, method = "BH"))
  }) %>% bind_rows()
}) %>% bind_rows()

de.res.2020 <- de.res.2020 %>%
  left_join(select(data.frame(rowData(sce.sc)), ID, Symbol), c("gene" = "ID"))

Heatmap with logFC per patient

opt$subsample <- FALSE

diffMat.list <- lapply(c("Birinapant"), function(z) {
  message(paste0("Fitting model for ", z))
  sce.x <- sce_sub[, sce_sub$celltype %in% c("T-PLL", "T-LGL", "HCL", "MCL")]
  colData(sce.x) <- colData(sce.x) %>%
    data.frame() %>%
    mutate(celltype = ifelse(celltype %in% c("T-PLL", "T-LGL", "HCL", "MCL"), patientID, celltype)) %>%
    DataFrame()
  sce.x <- sce.x[, sce.x$Treatment %in% c("DMSO", z)]
  summed <- summarizeAssayByGroup(sce.x, ids = colData(sce.x)[,c("celltype", "Treatment")], 
                                  statistics = "sum", assay.type = "counts") # get raw counts and sum them up

    colData(summed)$Treatment <- factor(colData(summed)$Treatment, levels = c("DMSO", compDrug))
    y <- DGEList(assay(summed), samples = colData(summed))
    opt$subsample
    if(opt$subsample == TRUE) {
      discarded <- summed$ncells < 10
    } else {
      discarded <- summed$ncells < 30
    }
    y <- y[,!discarded]
    message(paste0("Discarding ", sum(discarded), " samples"))
    design <- model.matrix(~0 + factor(celltype) + Treatment, y$samples) # set coefficient (name or matrix column) - fig. 7 f1000

    keep <- filterByExpr(y, design = design)
    y <- y[keep,]
    summary(keep)
    message(paste0("Keeping ", sum(keep), " genes"))
    y <- calcNormFactors(y)
    count.mat <- cpm(y, log = TRUE)
    colnames(count.mat) <- paste0(y$samples$celltype, "_", y$samples$Treatment)
    count.mat
  
    
      ## Compute the logFC per patient
  diffMat <- lapply(unique(sce.x$celltype), function(x) {
    message(x)
    ## Subset to the samples of this patient
    matSub <- count.mat[, str_detect(string = colnames(count.mat), pattern = x)]
    
    ## Save gene names
    matSub <- matSub %>% data.frame() %>% rownames_to_column("gene")
    matSub
    ## Compute logFC
    if(ncol(matSub) == 3) {
      colnames(matSub)[str_detect(string = colnames(matSub), pattern = "DMSO")] <- "DMSO"
      colnames(matSub)[str_detect(string = colnames(matSub), pattern = gsub(x = x, pattern = "-", replacement = "."))] <- "Treatment"

      matSub <- matSub %>%
        mutate(logFC = Treatment - DMSO) %>%
        select(gene, logFC)
    } else {
      matSub <- data.frame(gene = matSub$gene, logFC = NA)
    }
    colnames(matSub)[str_detect(string = colnames(matSub), pattern = "logFC")] <- x
    matSub %>% column_to_rownames("gene")
  }) %>% bind_cols()
})
## Fitting model for Birinapant
## Discarding 0 samples
## Keeping 12788 genes
## H431
## H526
## H496
## H371
## H501
## H279
## H525
## H432
## H452
names(diffMat.list) <- c("Birinapant")

geneSelec <- path.info %>% filter(pathway %in% c("NFKB", "NFKB_Chemo", "NFKB_GF", "NFKB_TF", "NFKB_Apo", "TNF"#, "TLR"
                                                 )) %>% 
  select(gene) %>% unlist() %>% as.vector()


patLabel <- data.frame(patientID = c("H371", "H431", "H279", "H501", "H452", "H432", "H496", "H525", "H526", "T-Cells"), 
                       tumorType = c("T-PLL1", "T-PLL2", "T-PLL3", "T-LGL1", "MCL1", "MCL2", "MCL3", "HCL1", "HCL2", "T-Cells"))

pHeat <- lapply(c("Birinapant")[[1]], function(x) {
  plotMat <- diffMat.list[[x]]
  
  if(x == "Nutlin-3a") {
    geneSelec <- path.info %>% filter(pathway %in% c("TP53up")) %>% select(gene) %>% unlist() %>% as.vector()
  }
  
  plotMat <- plotMat %>% rownames_to_column("ID") %>%
    left_join(select(data.frame(rowData(sce.sc)), ID, Symbol), c("ID")) %>%
    dplyr::select(-ID) %>% filter(Symbol %in% geneSelec) %>% column_to_rownames("Symbol")
  
  geneSelec <- intersect(rownames(plotMat), geneSelec)
  
  geneSig <- de.res.2020 %>% filter(p.adj < 0.1, Treatment %in% x, celltype %in% c("T-PLL", "MCL", "HCL")) %>% 
    filter(logFC > 0.25 | logFC < -0.25) %>%
    pull(Symbol) %>% unique()
  
  geneSelec <- intersect(geneSig, geneSelec)
  plotMat <- t(plotMat[geneSelec, ])
  rownames(plotMat) <- patLabel[match(rownames(plotMat), patLabel$patientID), ]$tumorType
  
  pHeat <- Heatmap(t(plotMat), clustering_method_rows = "ward.D2", 
          clustering_method_columns = "ward.D2", 
          show_row_names = TRUE,
          cluster_rows = TRUE, border = TRUE, #column_names_gp = gpar(fontsize = 8),
          column_title = paste0(x, " - Log2FC Malignant Cells"), 
          rect_gp = gpar(col = "black", lwd = 0.1),
          cluster_columns = TRUE, #row_names_gp = gpar(fontsize = 7),
          col=colorRamp2(c(-2, 0, 2), colors = c("#1976D2", "white", "#F44336")), name = "Log2FC")
  
  png(file=paste0(opt$plot, x, "_logFC_heatmap.png"),
      height = 25, width = 12.5, res = 600, units = "cm", bg = "transparent")
  draw(pHeat, background = "transparent")
  dev.off()
  
  pHeat
  
})
pHeat
## [[1]]

Output session info

## R version 4.3.2 (2023-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.3 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so;  LAPACK version 3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Etc/UTC
## tzcode source: system (glibc)
## 
## attached base packages:
##  [1] parallel  grid      stats4    stats     graphics  grDevices datasets 
##  [8] utils     methods   base     
## 
## other attached packages:
##  [1] scales_1.3.0                numbat_1.4.0               
##  [3] Matrix_1.6-1.1              airr_1.5.0                 
##  [5] ggsci_3.2.0                 BiocParallel_1.36.0        
##  [7] ggnet_0.1.0                 network_1.18.2             
##  [9] tftargets_1.3               ggrepel_0.9.5              
## [11] readxl_1.4.3                edgeR_4.0.16               
## [13] limma_3.58.1                circlize_0.4.16            
## [15] ComplexHeatmap_2.18.0       gridExtra_2.3              
## [17] gprofiler2_0.2.3            scater_1.30.1              
## [19] scran_1.30.2                scuttle_1.12.0             
## [21] SingleCellExperiment_1.24.0 SummarizedExperiment_1.32.0
## [23] Biobase_2.62.0              GenomicRanges_1.54.1       
## [25] GenomeInfoDb_1.38.8         IRanges_2.36.0             
## [27] S4Vectors_0.40.2            BiocGenerics_0.48.1        
## [29] MatrixGenerics_1.14.0       matrixStats_1.3.0          
## [31] lubridate_1.9.3             forcats_1.0.0              
## [33] stringr_1.5.1               dplyr_1.1.4                
## [35] purrr_1.0.2                 readr_2.1.5                
## [37] tidyr_1.3.1                 tibble_3.2.1               
## [39] ggplot2_3.5.1               tidyverse_2.0.0            
## 
## loaded via a namespace (and not attached):
##   [1] RcppAnnoy_0.0.22          splines_4.3.2            
##   [3] bitops_1.0-7              ggplotify_0.1.2          
##   [5] cellranger_1.1.0          polyclip_1.10-7          
##   [7] lifecycle_1.0.4           doParallel_1.0.17        
##   [9] lattice_0.21-9            MASS_7.3-60              
##  [11] magrittr_2.0.3            plotly_4.10.4            
##  [13] sass_0.4.9                rmarkdown_2.27           
##  [15] jquerylib_0.1.4           yaml_2.3.9               
##  [17] metapod_1.10.1            RColorBrewer_1.1-3       
##  [19] abind_1.4-5               zlibbioc_1.48.2          
##  [21] quadprog_1.5-8            hahmmr_1.0.0             
##  [23] ggraph_2.2.1              RCurl_1.98-1.14          
##  [25] yulab.utils_0.1.5         tweenr_2.0.3             
##  [27] GenomeInfoDbData_1.2.11   irlba_2.3.5.1            
##  [29] tidytree_0.4.6            dqrng_0.4.1              
##  [31] DelayedMatrixStats_1.24.0 codetools_0.2-19         
##  [33] DelayedArray_0.28.0       ggforce_0.4.2            
##  [35] tidyselect_1.2.1          shape_1.4.6.1            
##  [37] aplot_0.2.3               farver_2.1.2             
##  [39] ScaledMatrix_1.10.0       viridis_0.6.5            
##  [41] jsonlite_1.8.8            GetoptLong_1.0.5         
##  [43] BiocNeighbors_1.20.2      tidygraph_1.3.1          
##  [45] iterators_1.0.14          systemfonts_1.1.0        
##  [47] foreach_1.5.2             tools_4.3.2              
##  [49] ragg_1.3.2                treeio_1.29.0.002        
##  [51] Rcpp_1.0.12               glue_1.7.0               
##  [53] SparseArray_1.2.4         xfun_0.45                
##  [55] withr_3.0.0               fastmap_1.2.0            
##  [57] bluster_1.12.0            fansi_1.0.6              
##  [59] digest_0.6.36             rsvd_1.0.5               
##  [61] parallelDist_0.2.6        timechange_0.3.0         
##  [63] R6_2.5.1                  gridGraphics_0.5-1       
##  [65] textshaping_0.4.0         colorspace_2.1-0         
##  [67] Cairo_1.6-2               RhpcBLASctl_0.23-42      
##  [69] utf8_1.2.4                generics_0.1.3           
##  [71] renv_1.0.7                data.table_1.15.4        
##  [73] graphlayouts_1.1.1        httr_1.4.7               
##  [75] htmlwidgets_1.6.4         S4Arrays_1.2.1           
##  [77] uwot_0.2.2                pkgconfig_2.0.3          
##  [79] gtable_0.3.5              XVector_0.42.0           
##  [81] htmltools_0.5.8.1         clue_0.3-65              
##  [83] png_0.1-8                 ggfun_0.1.5              
##  [85] knitr_1.48                rstudioapi_0.16.0        
##  [87] tzdb_0.4.0                rjson_0.2.21             
##  [89] coda_0.19-4.1             statnet.common_4.9.0     
##  [91] nlme_3.1-163              cachem_1.1.0             
##  [93] GlobalOptions_0.1.2       vipor_0.4.7              
##  [95] pillar_1.9.0              logger_0.3.0             
##  [97] vctrs_0.6.5               BiocSingular_1.18.0      
##  [99] beachmat_2.18.1           cluster_2.1.4            
## [101] beeswarm_0.4.0            evaluate_0.24.0          
## [103] cli_3.6.3                 locfit_1.5-9.10          
## [105] compiler_4.3.2            rlang_1.1.4              
## [107] crayon_1.5.3              labeling_0.4.3           
## [109] fs_1.6.4                  ggbeeswarm_0.7.2         
## [111] stringi_1.8.4             viridisLite_0.4.2        
## [113] munsell_0.5.1             lazyeval_0.2.2           
## [115] hms_1.1.3                 patchwork_1.2.0          
## [117] sparseMatrixStats_1.14.0  statmod_1.5.0            
## [119] highr_0.11                igraph_2.0.3             
## [121] memoise_2.0.1             scistreer_1.2.0          
## [123] RcppParallel_5.1.8        bslib_0.7.0              
## [125] phangorn_2.11.1           ggtree_3.10.1            
## [127] fastmatch_1.1-4           ape_5.8